import numpy as np
import pandas as pd
df = pd.read_csv("heart_disease_uci.csv")
df.head()
| id | age | sex | dataset | cp | trestbps | chol | fbs | restecg | thalch | exang | oldpeak | slope | ca | thal | num | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 63 | Male | Cleveland | typical angina | 145.0 | 233.0 | True | lv hypertrophy | 150.0 | False | 2.3 | downsloping | 0.0 | fixed defect | 0 |
| 1 | 2 | 67 | Male | Cleveland | asymptomatic | 160.0 | 286.0 | False | lv hypertrophy | 108.0 | True | 1.5 | flat | 3.0 | normal | 2 |
| 2 | 3 | 67 | Male | Cleveland | asymptomatic | 120.0 | 229.0 | False | lv hypertrophy | 129.0 | True | 2.6 | flat | 2.0 | reversable defect | 1 |
| 3 | 4 | 37 | Male | Cleveland | non-anginal | 130.0 | 250.0 | False | normal | 187.0 | False | 3.5 | downsloping | 0.0 | normal | 0 |
| 4 | 5 | 41 | Female | Cleveland | atypical angina | 130.0 | 204.0 | False | lv hypertrophy | 172.0 | False | 1.4 | upsloping | 0.0 | normal | 0 |
df.tail()
| id | age | sex | dataset | cp | trestbps | chol | fbs | restecg | thalch | exang | oldpeak | slope | ca | thal | num | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 915 | 916 | 54 | Female | VA Long Beach | asymptomatic | 127.0 | 333.0 | True | st-t abnormality | 154.0 | False | 0.0 | NaN | NaN | NaN | 1 |
| 916 | 917 | 62 | Male | VA Long Beach | typical angina | NaN | 139.0 | False | st-t abnormality | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 917 | 918 | 55 | Male | VA Long Beach | asymptomatic | 122.0 | 223.0 | True | st-t abnormality | 100.0 | False | 0.0 | NaN | NaN | fixed defect | 2 |
| 918 | 919 | 58 | Male | VA Long Beach | asymptomatic | NaN | 385.0 | True | lv hypertrophy | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 919 | 920 | 62 | Male | VA Long Beach | atypical angina | 120.0 | 254.0 | False | lv hypertrophy | 93.0 | True | 0.0 | NaN | NaN | NaN | 1 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 920 entries, 0 to 919 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 920 non-null int64 1 age 920 non-null int64 2 sex 920 non-null object 3 dataset 920 non-null object 4 cp 920 non-null object 5 trestbps 861 non-null float64 6 chol 890 non-null float64 7 fbs 830 non-null object 8 restecg 918 non-null object 9 thalch 865 non-null float64 10 exang 865 non-null object 11 oldpeak 858 non-null float64 12 slope 611 non-null object 13 ca 309 non-null float64 14 thal 434 non-null object 15 num 920 non-null int64 dtypes: float64(5), int64(3), object(8) memory usage: 115.1+ KB
print(f"Record: {df.shape[0]}")
print(f"Columns : {df.shape[1]}")
Record: 920 Columns : 16
df.shape
(920, 16)
df.isna().sum()
id 0 age 0 sex 0 dataset 0 cp 0 trestbps 59 chol 30 fbs 90 restecg 2 thalch 55 exang 55 oldpeak 62 slope 309 ca 611 thal 486 num 0 dtype: int64
duplicates = df[df.duplicated()].value_counts()
print(duplicates)
Series([], Name: count, dtype: int64)
duplicates_all = df[df.duplicated(keep=False)]
print(duplicates_all)
Empty DataFrame Columns: [id, age, sex, dataset, cp, trestbps, chol, fbs, restecg, thalch, exang, oldpeak, slope, ca, thal, num] Index: []
remove_duplicates = df.drop_duplicates()
print(remove_duplicates)
id age sex dataset cp trestbps chol fbs \
0 1 63 Male Cleveland typical angina 145.0 233.0 True
1 2 67 Male Cleveland asymptomatic 160.0 286.0 False
2 3 67 Male Cleveland asymptomatic 120.0 229.0 False
3 4 37 Male Cleveland non-anginal 130.0 250.0 False
4 5 41 Female Cleveland atypical angina 130.0 204.0 False
.. ... ... ... ... ... ... ... ...
915 916 54 Female VA Long Beach asymptomatic 127.0 333.0 True
916 917 62 Male VA Long Beach typical angina NaN 139.0 False
917 918 55 Male VA Long Beach asymptomatic 122.0 223.0 True
918 919 58 Male VA Long Beach asymptomatic NaN 385.0 True
919 920 62 Male VA Long Beach atypical angina 120.0 254.0 False
restecg thalch exang oldpeak slope ca \
0 lv hypertrophy 150.0 False 2.3 downsloping 0.0
1 lv hypertrophy 108.0 True 1.5 flat 3.0
2 lv hypertrophy 129.0 True 2.6 flat 2.0
3 normal 187.0 False 3.5 downsloping 0.0
4 lv hypertrophy 172.0 False 1.4 upsloping 0.0
.. ... ... ... ... ... ...
915 st-t abnormality 154.0 False 0.0 NaN NaN
916 st-t abnormality NaN NaN NaN NaN NaN
917 st-t abnormality 100.0 False 0.0 NaN NaN
918 lv hypertrophy NaN NaN NaN NaN NaN
919 lv hypertrophy 93.0 True 0.0 NaN NaN
thal num
0 fixed defect 0
1 normal 2
2 reversable defect 1
3 normal 0
4 normal 0
.. ... ...
915 NaN 1
916 NaN 0
917 fixed defect 2
918 NaN 0
919 NaN 1
[920 rows x 16 columns]
df.shape
(920, 16)
import matplotlib.pyplot as plt
import seaborn as sns
sns.heatmap(df.isnull(),cbar = False)
plt.title("missing values")
plt.show()
def fill_na(df):
for column in df.columns:
if df[column].isna().sum() > 0:
if df[column].dtype in ["int64","float64"]:
df[column].fillna(round(df[column].median(),1),inplace = True)
else:
df[column].fillna(df[column].mode()[0],inplace = True)
fill_na(df)
df.isna().sum()
id 0 age 0 sex 0 dataset 0 cp 0 trestbps 0 chol 0 fbs 0 restecg 0 thalch 0 exang 0 oldpeak 0 slope 0 ca 0 thal 0 num 0 dtype: int64
top_legend = df["cp"].value_counts()
print(top_legend)
cp asymptomatic 496 non-anginal 204 atypical angina 174 typical angina 46 Name: count, dtype: int64
top_legend = df["cp"].value_counts().nlargest(4).index
print(top_legend)
Index(['asymptomatic', 'non-anginal', 'atypical angina', 'typical angina'], dtype='object', name='cp')
top_legend = df["cp"].value_counts().nlargest(4).index
display(top_legend)
plt.figure(figsize = (15,6))
sns.scatterplot(x = "age",y = "chol",data = df[df["cp"].isin(top_legend)],hue = "cp")
plt.title("age vs chlo scatter plot")
plt.xlabel("age")
plt.ylabel("chol")
plt.legend(title = "chest pain type",bbox_to_anchor = (1.05,1),loc = "upper left")
plt.show()
Index(['asymptomatic', 'non-anginal', 'atypical angina', 'typical angina'], dtype='object', name='cp')
# SCATTER PLOT IN AGE
top_legend = df["sex"].value_counts().nlargest(4).index
display(top_legend)
plt.figure(figsize = (15,6))
sns.scatterplot(x = "age",y = "chol",data = df[df["sex"].isin(top_legend)],hue = "sex")
plt.title("age vs chlo scatter plot")
plt.xlabel("age")
plt.ylabel("chol")
plt.legend(title = "gender type",bbox_to_anchor = (1.05,1),loc = "upper left")
plt.show()
Index(['Male', 'Female'], dtype='object', name='sex')
# Numerical Columns
numerical_cols = ["age","chol","trestbps","thalch","oldpeak","ca","num"]
for col in numerical_cols:
# figure size
plt.figure(figsize = (15,6))
# subplot in hist
plt.subplot(1,2,1)
sns.histplot(data = df[col],kde = True,color = "skyblue")
plt.title(f"Distibution of {col}")
plt.show()
# subplot in boxplot
plt.subplot(1,2,2)
sns.boxplot(x = col, data = df, color = "purple")
plt.title(f"Box plot of {col}")
plt.tight_layout()
plt.show()
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
# categorical columns
categorical_col = ["sex","dataset","cp","fbs","restecg","exang","slope","thal"]
for col in categorical_col:
# set the figure shape and size
plt.figure(figsize = (15,4))
# subplot
sns.countplot(x = col, data = df, order = df[col].value_counts().index, palette='Set2')
plt.xticks(rotation = 90)
plt.title(f"Count of {col}")
plt.tight_layout()
plt.show()
import plotly.express as px
fig = px.scatter(df,x= "age",y= "chol",color = "sex")
fig.update_layout(width = 1000,height = 800)
fig.update_layout(title = "scatter plot ofAge vs chol (colored by gender)")
fig.show()
from plotly.offline import iplot
plt = px.box(x = df["age"],
labels = {"x" : "age"},
title = "5 - number summary of (box plot) age")
plt.show()
fig = px.box(x = df["oldpeak"],
labels = {"x" :"oldpeak"},
title = "5 - Number summary of (box plot) oldpeak")
fig.show()
import plotly.express as px
fig = px.scatter(df,x = "chol",y = "age", color = "cp",size_max = 30, hover_name = "exang")
fig.update_layout(width = 1000,height = 500)
fig.update_layout(title_text = "scatter plot age vs chol (colored by old peak)")
fig.show()
import plotly.express as px
fig = px.scatter(df, x="chol", y="age", color="cp",size_max=30, hover_name="exang")
fig.update_layout(width=1000, height=500)
fig.update_layout(title_text="scatter plot age vs chol (colored by old peak)")
fig.show()
import plotly.express as px
fig = px.scatter(df, x='chol', y='age', color='cp',size_max = 30, hover_name = 'exang')
fig.update_layout(width=1000, height=500)
fig.update_layout(title_text='Scatter Plot of Cholesterol vs. Age (colored by cp)')
fig.show()
df_clean = df[df['oldpeak'].notna() & (df['oldpeak'] >= 0)]
print(df_clean)
import plotly.express as px
fig = px.scatter(
df_clean,
x='chol',
y='age',
color='cp',
size='oldpeak',
size_max=30,
hover_name='exang'
)
fig.update_layout(width=1000, height=800)
fig.update_layout(title_text='Scatter Plot of Cholesterol vs. Age (colored by cp)')
fig.show()
id age sex dataset cp trestbps chol fbs \
0 1 63 Male Cleveland typical angina 145.0 233.0 True
1 2 67 Male Cleveland asymptomatic 160.0 286.0 False
2 3 67 Male Cleveland asymptomatic 120.0 229.0 False
3 4 37 Male Cleveland non-anginal 130.0 250.0 False
4 5 41 Female Cleveland atypical angina 130.0 204.0 False
.. ... ... ... ... ... ... ... ...
915 916 54 Female VA Long Beach asymptomatic 127.0 333.0 True
916 917 62 Male VA Long Beach typical angina 130.0 139.0 False
917 918 55 Male VA Long Beach asymptomatic 122.0 223.0 True
918 919 58 Male VA Long Beach asymptomatic 130.0 385.0 True
919 920 62 Male VA Long Beach atypical angina 120.0 254.0 False
restecg thalch exang oldpeak slope ca \
0 lv hypertrophy 150.0 False 2.3 downsloping 0.0
1 lv hypertrophy 108.0 True 1.5 flat 3.0
2 lv hypertrophy 129.0 True 2.6 flat 2.0
3 normal 187.0 False 3.5 downsloping 0.0
4 lv hypertrophy 172.0 False 1.4 upsloping 0.0
.. ... ... ... ... ... ...
915 st-t abnormality 154.0 False 0.0 flat 0.0
916 st-t abnormality 140.0 False 0.5 flat 0.0
917 st-t abnormality 100.0 False 0.0 flat 0.0
918 lv hypertrophy 140.0 False 0.5 flat 0.0
919 lv hypertrophy 93.0 True 0.0 flat 0.0
thal num
0 fixed defect 0
1 normal 2
2 reversable defect 1
3 normal 0
4 normal 0
.. ... ...
915 normal 1
916 normal 0
917 fixed defect 2
918 normal 0
919 normal 1
[908 rows x 16 columns]
fig = px.scatter(df_clean,
x = "age",
y = "chol",
color = "cp",
size = "ca"
)
fig.update_layout(title = "scatter plot of age vs chol (colored by ca)",
width = 1000,
height = 800)
fig.show()
fig = px.scatter(df_clean,
x = "age",
y = "chol",
color = "cp",
size = "ca",
hover_name = "oldpeak")
fig.update_layout(title = "scatter plot of age vs chol (colored by ca)",
width = 1000,
height = 800)
fig.show()
def remove_outliers_iqr(df,column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
# inter qutraile range
IQR = Q3 - Q1
# bounds
lower_bound = Q1 - (1.5) * IQR
upper_bound = Q3 + (1.5) * IQR
filteded_data = [(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return filteded_data
column = ["chol","trestbps","thalch","oldpeak","ca"]
data_clean = remove_outliers_iqr(df,column)
print(data_clean)
[ chol trestbps thalch oldpeak ca 0 True True True True True 1 True True True True False 2 True True True True False 3 True True True True True 4 True True True True True .. ... ... ... ... ... 915 True True True True True 916 True True True True True 917 True True True True True 918 True True True True True 919 True True True True True [920 rows x 5 columns]]
# bar plot using plotly
import plotly.express as px
fig = px.bar(df,x = "age",y = "chol",hover_name = "num",color = "sex",height = 500)
fig.update_layout(title ="Bar plot of age wise chol count")
fig.show()
plt.figure(figsize=(8,5))
for col in df.columns:
sns.kdeplot(df[col], label=col)
plt.title("Feature Distributions After Cleaning", fontsize=14)
plt.xlabel("Value")
plt.legend()
plt.show()
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[36], line 3 1 plt.figure(figsize=(8,5)) 2 for col in df.columns: ----> 3 sns.kdeplot(df[col], label=col) 5 plt.title("Feature Distributions After Cleaning", fontsize=14) 6 plt.xlabel("Value") File D:\Users\tamilarasi\Lib\site-packages\seaborn\distributions.py:1695, in kdeplot(data, x, y, hue, weights, palette, hue_order, hue_norm, color, fill, multiple, common_norm, common_grid, cumulative, bw_method, bw_adjust, warn_singular, log_scale, levels, thresh, gridsize, cut, clip, legend, cbar, cbar_ax, cbar_kws, ax, **kwargs) 1692 if ax is None: 1693 ax = plt.gca() -> 1695 p._attach(ax, allowed_types=["numeric", "datetime"], log_scale=log_scale) 1697 method = ax.fill_between if fill else ax.plot 1698 color = _default_color(method, hue, color, kwargs) File D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1211, in VectorPlotter._attach(self, obj, allowed_types, log_scale) 1206 if var_type not in allowed_types: 1207 err = ( 1208 f"The {var} variable is {var_type}, but one of " 1209 f"{allowed_types} is required" 1210 ) -> 1211 raise TypeError(err) 1213 # -- Get axis objects for each row in plot_data for type conversions and scaling 1215 facet_dim = {"x": "col", "y": "row"} TypeError: The x variable is categorical, but one of ['numeric', 'datetime'] is required
import matplotlib.pyplot as plt
import seaborn as sns
# Select only numeric columns
numeric_cols = df.select_dtypes(include='number').columns
plt.figure(figsize=(8, 5))
for col in numeric_cols:
sns.kdeplot(df[col], label=col)
plt.title("Feature Distributions After Cleaning", fontsize=14)
plt.xlabel("Value")
plt.legend()
plt.show()
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
sns.scatterplot(data = df,x = "cp",y = "chol",hue = "sex")
<Axes: xlabel='cp', ylabel='chol'>
sns.stripplot(data = df,x = "cp",y = "age",hue = "sex",jitter = False)
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
<Axes: xlabel='cp', ylabel='age'>
sns.catplot(data = df,x = "cp",y = "age",kind = "strip",hue = "sex")
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
<seaborn.axisgrid.FacetGrid at 0x17a6512e0d0>
sns.swarmplot(data = df,x = "cp",y = "age")
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 10.9% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
<Axes: xlabel='cp', ylabel='age'>
D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 24.8% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
sns.catplot(data = df,x = "cp",y = "chol",kind = "swarm")
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 47.4% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 19.6% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 8.6% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
<seaborn.axisgrid.FacetGrid at 0x17a63980c10>
D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 55.8% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 29.9% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 16.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 56.5% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
sns.swarmplot(data = df,x = "cp",y = "age",hue= "sex")
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 10.9% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
<Axes: xlabel='cp', ylabel='age'>
D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 24.8% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
sns.boxplot(data = df,x = "cp",y = "age")
<Axes: xlabel='cp', ylabel='age'>
sns.boxplot(data = df,x = "cp",y = "chol")
<Axes: xlabel='cp', ylabel='chol'>
sns.boxplot(data = df,x= "cp",y = "age",hue = "sex")
<Axes: xlabel='cp', ylabel='age'>
sns.boxplot(data = df,x = "cp",y = "chol",hue = "sex")
<Axes: xlabel='cp', ylabel='chol'>
sns.boxplot(data = df,x = "restecg",y = "thalch",hue = "fbs",color = "yellow")
<Axes: xlabel='restecg', ylabel='thalch'>
df.head()
| id | age | sex | dataset | cp | trestbps | chol | fbs | restecg | thalch | exang | oldpeak | slope | ca | thal | num | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 63 | Male | Cleveland | typical angina | 145.0 | 233.0 | True | lv hypertrophy | 150.0 | False | 2.3 | downsloping | 0.0 | fixed defect | 0 |
| 1 | 2 | 67 | Male | Cleveland | asymptomatic | 160.0 | 286.0 | False | lv hypertrophy | 108.0 | True | 1.5 | flat | 3.0 | normal | 2 |
| 2 | 3 | 67 | Male | Cleveland | asymptomatic | 120.0 | 229.0 | False | lv hypertrophy | 129.0 | True | 2.6 | flat | 2.0 | reversable defect | 1 |
| 3 | 4 | 37 | Male | Cleveland | non-anginal | 130.0 | 250.0 | False | normal | 187.0 | False | 3.5 | downsloping | 0.0 | normal | 0 |
| 4 | 5 | 41 | Female | Cleveland | atypical angina | 130.0 | 204.0 | False | lv hypertrophy | 172.0 | False | 1.4 | upsloping | 0.0 | normal | 0 |
sns.boxplot(data = df,y = "oldpeak")
<Axes: ylabel='oldpeak'>
# Violinplot = (Boxplot + KDEplot)
sns.violinplot(data = df,x = "num",y = "age")
<Axes: xlabel='num', ylabel='age'>
sns.violinplot(data = df,x = "cp",y = "age")
<Axes: xlabel='cp', ylabel='age'>
sns.catplot(data = df, x = "cp",y = "age",kind = "violin",hue = "sex",split = True,color = "skyblue")
<seaborn.axisgrid.FacetGrid at 0x17a65830c10>
sns.catplot(data = df,x = "cp",y = "age",kind = "violin",hue = "sex",color = "purple")
<seaborn.axisgrid.FacetGrid at 0x17a65780090>
sns.barplot(data = df,x = "cp" ,y = "age")
<Axes: xlabel='cp', ylabel='age'>
sns.barplot(data = df,x = "fbs",y = "age")
<Axes: xlabel='fbs', ylabel='age'>
sns.barplot(data = df,x = "dataset",y = "chol")
<Axes: xlabel='dataset', ylabel='chol'>
sns.barplot(data = df,x = "dataset",y = "age",hue = "sex",color = "purple")
<Axes: xlabel='dataset', ylabel='age'>
sns.catplot(data = df,x = "sex",y = "age",hue = "fbs",kind = "box",color = "purple")
<seaborn.axisgrid.FacetGrid at 0x17a66d7c550>
sns.regplot(data = df,x = "age",y = "chol",color = "red")
<Axes: xlabel='age', ylabel='chol'>
sns.lmplot(data = df,x= "age",y = "chol",hue = "sex")
<seaborn.axisgrid.FacetGrid at 0x17a67019390>
sns.lmplot(data = df,x = "ca",y = "num",hue = "fbs")
<seaborn.axisgrid.FacetGrid at 0x17a66f5b790>
sns.residplot(data = df,x = "chol",y = "thalch")
<Axes: xlabel='chol', ylabel='thalch'>
sns.catplot(data = df,x = "sex",y = "age",kind = "violin")
<seaborn.axisgrid.FacetGrid at 0x17a6712f890>
sns.catplot(data = df,
x = "sex",
y = "chol",
kind = "violin")
<seaborn.axisgrid.FacetGrid at 0x17a671a8f90>